# Importing libraries
import pandas as pd

# Loading the data
brands = pd.read_csv("brands.csv") 
finance = pd.read_csv("finance.csv")
info = pd.read_csv("info.csv")
reviews = pd.read_csv("reviews.csv")

# Merge the data and drop null values
merged_df = info.merge(finance, on="product_id")
merged_df = merged_df.merge(reviews, on="product_id")
merged_df = merged_df.merge(brands, on="product_id")
merged_df.dropna(inplace=True)

# final merged data
merged_df.head()

# Add price labels based on listing_price quartiles
twenty_fifth = merged_df["listing_price"].quantile(0.25)
median = merged_df["listing_price"].quantile(0.5)
seventy_fifth = merged_df["listing_price"].quantile(0.75) 
maximum = merged_df["listing_price"].max()
merged_df["price_label"] = pd.cut(merged_df["listing_price"], bins=[0, twenty_fifth, median, seventy_fifth, maximum], labels=["Budget", "Average", "Expensive", "Elite"], include_lowest=True)

# Group by brand and price_label to get volume and mean revenue
adidas_vs_nike = merged_df.groupby(["brand", "price_label"], as_index=False).agg(
    num_products=("price_label", "count"), 
    mean_revenue=("revenue", "mean")
).round(2)

adidas_vs_nike

C:\Users\newbe\AppData\Local\Temp\ipykernel_13972\2545225764.py:9: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  adidas_vs_nike = merged_df.groupby(["brand", "price_label"], as_index=False).agg(

# Store the length of each description
merged_df["description_length"] = merged_df["description"].str.len()

# Upper description length limits
lengthes = [0, 100, 200, 300, 400, 500, 600, 700]

# Description length labels
labels = ["100", "200", "300", "400", "500", "600", "700"]

# Cut into bins
merged_df["description_length"] = pd.cut(merged_df["description_length"], bins=lengthes, labels=labels)



# Group by the bins
description_lengths = merged_df.groupby("description_length", as_index=False).agg(
    mean_rating=("rating", "mean"), 
    total_reviews=("reviews", "sum")
).round(2)

description_lengths

C:\Users\newbe\AppData\Local\Temp\ipykernel_13972\2042162190.py:16: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  description_lengths = merged_df.groupby("description_length", as_index=False).agg(

Columns	Description
`product_id`	Unique product identifier
`brand`	Brand name of the product

Columns	Description
`product_id`	Unique product identifier
`listing_price`	Original price of the product
`sale_price`	Discounted price of the product
`discount`	Discount off the listing price (as a decimal)
`revenue`	Revenue generated by the product

Columns	Description
`product_name`	Name of the product
`product_id`	Unique product identifier
`description`	Description of the product

Columns	Description
`product_id`	Unique product identifier
`rating`	Average rating of the product
`reviews`	Number of reviews for the product

	product_name	product_id	description	listing_price	sale_price	discount	revenue	rating	reviews	brand
1	Women's adidas Originals Sleek Shoes	G27341	A modern take on adidas sport heritage, tailor...	75.99	37.99	0.5	1641.17	3.3	24.0	Adidas
2	Women's adidas Swim Puka Slippers	CM0081	These adidas Puka slippers for women's come wi...	9.99	5.99	0.4	398.93	2.6	37.0	Adidas
3	Women's adidas Sport Inspired Questar Ride Shoes	B44832	Inspired by modern tech runners, these women's...	69.99	34.99	0.5	2204.37	4.1	35.0	Adidas
4	Women's adidas Originals Taekwondo Shoes	D98205	This design is inspired by vintage Taekwondo s...	79.99	39.99	0.5	5182.70	3.5	72.0	Adidas
5	Women's adidas Sport Inspired Duramo Lite 2.0 ...	B75586	Refine your interval training in these women's...	47.99	19.20	0.6	1555.20	1.0	45.0	Adidas

Project Description¶

The data:¶

What is the volume of products and average revenue for Adidas and Nike products based on listing price quartiles?¶

Do any differences exist between the word count of a product's description and its mean rating?¶

We can answer many more questions from the dataset. Thank you!¶

	brand	price_label	num_products	mean_revenue
0	Adidas	Budget	574	2015.68
1	Adidas	Average	655	3035.30
2	Adidas	Expensive	759	4621.56
3	Adidas	Elite	587	8302.78
4	Nike	Budget	357	1596.33
5	Nike	Average	8	675.59
6	Nike	Expensive	47	500.56
7	Nike	Elite	130	1367.45

	description_length	mean_rating	total_reviews
0	100	2.26	36.0
1	200	3.19	17719.0
2	300	3.28	76115.0
3	400	3.29	28994.0
4	500	3.35	4984.0
5	600	3.12	852.0
6	700	3.65	818.0